library(tidyverse) # for graphing and data cleaning
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggthemes) # for even more plotting themes
library(gganimate) # for adding animation layers to ggplots
library(RColorBrewer) # for color palettes
library(viridis)
## Loading required package: viridisLite
library(plotly) # for the ggplotly() - basic interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
library(gifski) # need the library for creating gifs but don't need to load each time
library(gt)
theme_set(theme_minimal()) # My favorite ggplot() theme :)
freq_theme_words <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/word_themes_freq.csv")
freq_country_words <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/word_country_freq.csv")
headline_site <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/headlines_site.csv")
word_theme_rank <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/word_themes_rank.csv")
headline_examples <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/headlines.csv")
polarity_site <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/polarity_comparison_site_country_time.csv")
polarity_over_time <- read.csv("https://raw.githubusercontent.com/the-pudding/data/master/women-in-headlines/polarity_comparison_country_time.csv")
POLARITY CALCULATIONS We measure polarity by performing sentiment analysis on each headline using the Vader python package, where each headline gets a sentiment score from -1 to 1 (from more negative to more positive). Because we are interested in polarity, we take the absolute value of each headline’s score.
BIAS CALCULATIONS We measure gender bias by tracking the combined occurrence of gendered language and social stereotypes usually associated with women. We do this in two steps: 1) We check if a headline contains gendered language (i.e. “spokeswoman,” “chairwoman,” “she,” “her,” “bride,” “daughter,” “daughters,” “female,” “fiancee,” “girl,” “girlfriend” etc.). 2) If it contains gendered language, we then count the number of words that are considered to be social stereotypes about women (i.e. “weak,” “modest,” “virgin,” “slut,” “whore,” “sexy,” “feminine,” “sensitive,” “emotional,” “gentle,” “soft,” “pretty,” “bitch,” “sexual” etc.). Finally, we normalize this count for all headlines within each outlet as a score between 0 and 1, and we aggregate (i.e. average) this score for each outlet. (site from pudding https://pudding.cool/2022/02/women-in-headlines/)
A cumulative bar graph for the words used to describe women used in headlines. They are divided into 5 main categories with crime and violence having the most words and the highest frequency. The graph is interactive so each word can be highlighted with the individual word and frequency.
pivot_words <- freq_theme_words %>%
pivot_longer(cols = -theme,
names_to = "word",
values_to = "freq") %>%
na.omit()
word_plot <- pivot_words %>%
filter(theme != "No theme") %>%
ggplot(aes(x = theme,
y = freq,
fill = fct_reorder(word, freq),
text = paste("word:", word))) +
geom_col(color = "black") +
theme(legend.position = "none") +
scale_fill_viridis_d()+
labs(title = "Cumulative Frequency of Words describing Women in Headlines",
x = "",
y = "Frequency")
ggplotly(word_plot,
tooltip = c("y", "text"))
pivot_country_word <- freq_country_words %>%
pivot_longer(cols = -country,
names_to = "word",
values_to = "number") %>%
filter(word != "X") %>%
na.omit()
pivot_country_word
word_theme_rank %>%
filter(`rank` < 6) %>%
select(!`X`) %>%
ggplot(aes(y = word, x = count)) +
geom_col(aes(fill = count))+
scale_fill_viridis_c(option = "viridis") +
facet_wrap(~theme,
scales = "free")+
theme(legend.position = "none")+
labs(title = "Count of Top 5 words per Theme",
y = "",
x = "")
Smooth line graph displaying the average polarity of headlines over time with the value from 2020 being displayed with a data point. Want to place top label above line but need to expand boundries of graph! Would like to animate over time, if we have time.
polarity_over_time %>%
group_by(`year`) %>%
summarise(women_mean = mean(`women_polarity_mean`),
all_mean = mean(`all_polarity_mean`),
year) %>%
ggplot()+
geom_smooth(aes(x=`year`, y=`women_mean`), color = "springgreen4", se = FALSE)+
geom_smooth(aes(x=`year`, y=`all_mean`), color = "black", se = FALSE)+
geom_point(aes(x=2020.0, y=0.425),
color = "black", fill = "springgreen4",
size = 5, stroke = 2, shape = 21) +
geom_point(aes(x=2020.0, y=0.28), size = 2.5)+
geom_label(label = "Headlines about\nwomen", x= 2019.4, y=0.40, color = "springgreen4")+
geom_label(label = "Headlines about\nother topics", x=2019.4, y= 0.25)+
scale_x_continuous(breaks = c(2010, 2012, 2014, 2016, 2018, 2020))+
labs(title = "Average of Polarity of News Headlines over Time",
y = "",
x = "")+
theme(plot.title = element_text(hjust = 0.5),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
axis.line.x = element_line(color = "black"))
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Polarity from sites, base polarity (black) to women polarity (green) with the differences as a line segment, they are ordered by polarity of women value, not by differences
polarity_site %>%
ggplot()+
geom_segment(aes(x=polarity_base, xend=polarity_women, y=fct_reorder(site, polarity_women), yend=site, color = country_of_pub), size = 1)+
scale_color_manual(values = c("India" = "purple4",
"South Africa" = "thistle3",
"UK" = "steelblue4",
"USA" = "mediumseagreen"))+
geom_point(aes(x=polarity_base, y = site), size = 2)+
geom_point(aes(x=polarity_women, y = site), color = "black", fill = "springgreen4",
size = 3, stroke = 1, shape = 21)+
labs(title = "Polarity of News Outlines:\n Headlines about Women vs. Headlines about other topics",
y = "",
x = "Polarity",
color = "Country of\nPublication")+
theme(plot.title = element_text(hjust = 0.5))
Ten example headlines from 10 different news sites, sorted by lowest bias score
last_ten_headlines <- headline_examples %>%
rename("Headline" = `headline_no_site`,
"Site" = `site`,
"Country" = `country`,
"Bias" = `bias`) %>%
arrange(`Bias`) %>%
distinct(Site, .keep_all = TRUE) %>%
slice(1:10) %>%
select(`Headline`, `Site`, `Country`, `Bias`)
last_ten_headlines_table <- gt(last_ten_headlines) %>%
tab_header(title = "Least Biased Headline Examples") %>%
data_color(columns = vars(`Headline`, `Site`, `Country`, `Bias`),
colors = '#bccae0')
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
last_ten_headlines_table
| Least Biased Headline Examples | |||
|---|---|---|---|
| Headline | Site | Country | Bias |
| 'Lady Bird' buzzes through young sexuality | iol.co.za | South Africa | 0 |
| American Woman, Divorced From Saudi Husband, Is Trapped in Saudi Arabia | msn.com | India | 0 |
| 'SA poorer without her' SACP reacts to Madikizela Mandela's death | News24.com | South Africa | 0 |
| WATCH | North West farmer 'assaults man and mother' with knobkierie | Sowetanlive.co.za | South Africa | 0 |
| First look at Kim Kardashian and Kanye West's baby girl, Chicago | Timeslive.co.za | South Africa | 0 |
| Carly Fiorina Repeats After Girl: 'Donald Trump's a Moron' | abcnews.go.com | USA | 0 |
| KPMG's US CEO Lynne Doughtie explains why feeling uncomfortable in your job is actually a good thing | businessinsider.com | India | 0 |
| This Artist Makes Insanely Accurate Knitted People | buzzfeed.com | UK | 0 |
| University of Missouri professor Melissa Click: Video "doesn't represent the good I was doing" | cbsnews.com | USA | 0 |
| Jury convicts gang member in 2011 murder of pregnant teen | chicagotribune.com | USA | 0 |
Ten example headlines from 10 different news sites, sorted by highest bias score
top_ten_headlines <- headline_examples %>%
rename("Headline" = `headline_no_site`,
"Site" = `site`,
"Country" = `country`) %>%
mutate(Bias = round(bias, digits = 3)) %>%
arrange(desc(`Bias`)) %>%
distinct(Site, .keep_all = TRUE) %>%
slice(1:10) %>%
select(`Headline`, `Site`, `Country`, `Bias`)
top_ten_headlines_table <- gt(top_ten_headlines) %>%
tab_header(title = "Most Biased Headline Examples") %>%
data_color(columns = vars(`Headline`, `Site`, `Country`, `Bias`),
colors = '#bccae0')
## Warning: `columns = vars(...)` has been deprecated in gt 0.3.0:
## * please use `columns = c(...)` instead
top_ten_headlines_table
| Most Biased Headline Examples | |||
|---|---|---|---|
| Headline | Site | Country | Bias |
| Girl with severe eczema told her mum she 'didn't want to look at herself in the mirror' she's now a model | manchestereveningnews.co.uk | UK | 1.000 |
| A Mother Said Her 9 Year Old Daughter Killed Herself Because She Was Bullied For Being Friends With A White Boy | buzzfeed.com | UK | 0.833 |
| Wuthering Heights actress Merle Oberon's secret that she took to the grave... her sister was her mother who ga | dailymail.co.uk | India | 0.833 |
| Woman reunited with her long lost brother reveals surprise as she discovers she's now her SISTER | dailyrecord.co.uk | UK | 0.833 |
| Gal Gadot’s Daughter Made Wonder Woman Actress Appreciate the Importance of Female Superheroes | hollywoodreporter.com | USA | 0.833 |
| Priyanka Chopra's Mother Reveals She Got Emotional Seeing Her Daughter Dressed up as Bride | india.com | India | 0.833 |
| ‘Why is there no Disney princess from India?’: Girl asks her mom and here’s what she does next | indianexpress.com | India | 0.833 |
| Woman regrets bum tattoo that reminds her of the time she defecated on herself beside boyfriend's mum | mirror.co.uk | UK | 0.833 |
| Lady Gaga's mom missed 'warning signs' of her daughter in 'crisis': 'For me, that’s a hard pill to still live with and swallow' | news.yahoo.com | India | 0.833 |
| Brooklyn woman whose mom died after she beat her up amid fight over cat suffers PTSD from life as prostitute | nydailynews.com | USA | 0.833 |